
filename cars72 'D:\Jim\CEXdata\Cars\cars_data\09034-0012-Data.txt';
filename cars73 'D:\Jim\CEXdata\Cars\cars_data\09034-0013-Data.txt';
**************************************************************************
Section 1
READ IN RAW DATA
**************************************************************************;
data auto72;
infile cars72;
input rec_no 11-12;
If rec_no=25 Then Input
	newid 1-5 vehicib $ 14-15 vehicyb 16-17 model 18-19 vmake 20-21 autotran 24
	pwrsteer 25 pwrbrake 26
	aircar 27 vehnewu 34 vehgftc 35 vehpurm1 $ 36-37 vehpury1 $ 38-39 tradex1 $ 40-46 netpurx1 $ 47-53;

year=1972;
newid=1000000*year+newid;
If rec_no=25;

Run;
PROC MEANS;
RUN;

Data auto73;
infile cars73;
input rec_no 11-12;
If rec_no=25 Then Input
	newid 1-5 vehicib $ 14-15 vehicyb 16-17 model 18-19 vmake 20-21 autotran 24 
	pwrsteer 25 pwrbrake 26
	aircar 27 vehnewu 34 vehgftc 35 vehpurm1 $ 36-37 vehpury1 $ 38-39 tradex1 $ 40-46 netpurx1 $ 47-53;

year=1973;
newid=1000000*year+newid;
If rec_no=25;
Run;
PROC MEANS;
RUN;

**************************************************************************
Section 2
**************************************************************************;

Data auto7273;
Set auto72 auto73;
id=_N_;
*create vehpuryr, vehpurmo, netpurx, and tradex that are valid numeric variables;
If vehpury1='&' Then vehpury1=.;
If vehpury1='-' Then vehpury1=.;
If vehpury1='+' Then vehpury1=.;

If vehpurm1='&' Then vehpurm1=.;
If vehpurm1='-' Then vehpurm1=.;
If vehpurm1='+' Then vehpurm1=.;

If netpurx1='&' Then netpurx1=.;
If netpurx1='-' Then netpurx1=.;
If netpurx1='+' Then netpurx1=.;

If tradex1='&' Then tradex1=.;
If tradex1='-' Then tradex1=.;
If tradex1='+' Then tradex1=.;

vehpurmo=vehpurm1*1;
*86 observations with vehpurmo=0 and 3397 observations with vehpurmo>12;
If vehpurmo=0 or vehpurmo GT 12 Then vehpurmo=6;

vehpuryr=vehpury1*1;

*for vehpuryr, 255 obs =74, 6 obs =88, 66 obs =0, and 49 obs between 1 and 9;
IF vehpuryr=0 OR vehpuryr=88 OR vehpuryr LT 10 Then vehpuryr=.;
IF vehpuryr=74 THEN vehpuryr=73;

*must divide by 100 b/c implied 2 decimal places;
netpurx=netpurx1/100;
tradex=tradex1/100;
if tradex=. then tradex=0;

* DO NOT INCLUDE TRADEX BECAUSE ONLY 1384 OBS WHERE BOTH TRADEX AND NETPURX ARE 
  POSITIVE AND THEY DON'T EQUAL EACH OTHER;
* FOR SOME REASON, IT LOOKS LIKE NETPURX HAS IMPLIED DECIMAL VEHPURYR=. BUT NO
  IMPLIED DECIMAL WHEN VEHPURYR IS NOT MISSING;
IF vehpuryr=. AND netpurx GT 100 THEN totpurx=netpurx;
   ELSE IF vehpuryr=. THEN totpurx=.; 
   *  ALL BUT 5 OR SO OBS ARE LE 130 IN THIS CASE AND LESS THAN 2% OF OBS WITH VEHPURYR
      EQUAL TO MISSING ARE BELOW 130;
   ELSE IF vehpuryr NE . AND netpurx LE 130 THEN totpurx=netpurx*100; 

* There are 20 different assigned vehicle grops 
  The codebook gives specific designation for 1,11,...91
  There is no designation for 7,17...97, but proc freq shows these are common groups
  Group model=., model= unassigned # into a single missing group = 22;
model_adj=22;
DO t=1 TO 10;
   IF model=(t-1)*10+1 THEN model_adj=t;
   IF model=(t-1)*10+7 THEN model_adj=t+10;
END;
DROP t;
IF model=92 THEN model_adj=21;
IF model=0 THEN model_adj=0;


IF vmake=1 THEN vmake_adj=1;
   ELSE IF vmake=2 THEN vmake_adj=2;
   ELSE IF vmake=3 THEN vmake_adj=3;
   ELSE IF vmake=11 THEN vmake_adj=4;
   ELSE IF vmake=12 THEN vmake_adj=5;
   ELSE IF vmake=13 THEN vmake_adj=6;
   ELSE IF vmake=14 THEN vmake_adj=7;
   ELSE IF vmake=15 THEN vmake_adj=8;
   ELSE IF vmake=21 THEN vmake_adj=9;
   ELSE IF vmake=22 THEN vmake_adj=10;
   ELSE IF vmake=31 THEN vmake_adj=11;
   ELSE IF vmake=32 THEN vmake_adj=12;
   ELSE IF vmake=33 THEN vmake_adj=13;
   ELSE IF vmake=41 THEN vmake_adj=14;
   ELSE IF vmake=42 THEN vmake_adj=15;
   ELSE IF vmake=51 THEN vmake_adj=16;
   ELSE IF vmake=61 THEN vmake_adj=17;
   ELSE IF vmake=62 THEN vmake_adj=18;
   ELSE IF vmake=63 THEN vmake_adj=19;
   ELSE IF vmake=64 THEN vmake_adj=20;
   ELSE IF vmake=65 THEN vmake_adj=21;
   ELSE IF vmake=66 THEN vmake_adj=22;
   ELSE IF vmake=67 THEN vmake_adj=23;
   ELSE IF vmake=71 THEN vmake_adj=24;
   ELSE IF vmake=72 THEN vmake_adj=25;
   ELSE IF vmake=73 THEN vmake_adj=26;
   ELSE IF vmake=74 THEN vmake_adj=27;
   ELSE IF vmake=88 THEN vmake_adj=28;
   ELSE IF vmake IN(98,99,.) THEN vmake_adj=29;
   
*create a specific make_model variable for each vehicle based on make and model;
make_model=100*vmake_adj+model_adj;

*create a year dummy equal unity in 1973;
If year=1973 then yeardummy=1;
else yeardummy=0;

vehpuryr=1900+vehpuryr;

* CODEBOOK STATES THAT VEHICLE VALUE ONLY RECORDED IF VEHICLE PURCHASED IN SURVEY YEAR;
* DELETE THIS LINE, B/C IT CASUES TOO MANY CARS (ABOUT HALF) TO LOOK LIKE THEY'VE BEEN
  PURCHASED IN THE PAST YEAR.  INSTEAD, WE'LL IMPUTE THAT AGE AS 3 YEARS WHEN VEHPURYR MISSING;
* IF vehpuryr=. AND netpurx GT 0 THEN vehpuryr=year;

*Creating an own_for (months owned for) variable for the observations where vehicle purchase year (vehpuryr) is not missing;
If vehpuryr NE . then own_for=MAX(0,INTCK('MONTH',MDY(vehpurmo,1,vehpuryr),MDY(6,1,year)));


RUN;
*  VEHICLE PURCHASE YEAR IS MISSING FOR 11,182 OBS.  SO WE WILL FIRST IMPUTE AN OWN_FOR
   FOR EACH VEHICLE BASED ON MAKE AND MODEL GROUP, AND THEN USE THIS OWN_FOR TO CALCULATE
   A PREDICTED PURCHASE YEAR.  THIS WILL ADD AN OWN_FOR FOR ANOTHER 3,700 OBS;







PROC SORT DATA=auto7273;
  BY make_model;
RUN;
PROC MEANS NOPRINT MEDIAN DATA=auto7273;
  BY make_model;
  WHERE own_for NE .;
  VAR own_for;
  OUTPUT OUT=cartemp1 MEDIAN=own_med;
RUN;
PROC MEANS MEDIAN DATA=auto7273;
  WHERE own_for NE .;
  VAR own_for;
  OUTPUT OUT=cartemp2 MEDIAN=own_med2;
RUN;


/*
DATA auto7273;
MERGE auto7273(IN=in1) 
      cartemp1(KEEP=make_model own_med _FREQ_ RENAME=(_FREQ_=num_mod_grp));
BY make_model;
IF in1;

IF num_mod_grp GE 3 AND own_for=. THEN own_for=own_med;
RUN;
*/
*  FOR THE REMAINING 7,400 MISSING OBS ON OWN_FOR, WE GIVE THEM THE MEDIAN ACROSS ALL OBSERVED;





DATA auto7273;

*IF _N_=1 THEN SET cartemp2(KEEP=own_med2);
  SET auto7273; 

*  IF own_for=. THEN own_for=own_med2;

own_for=ROUND(own_for,1);

yrs_owned=own_for/12;

*Observations where vehpuryr vehpurmo and model all NE .;
*If model=0 means made before 1968 (so 1967 and before);

IF model NE . AND yrs_owned NE . THEN DO;
   *for cars purchased new yrmade=survey date-purchase date);
   If model=0 Then yrmade=MIN(1967,year-yrs_owned);
   Else If model NOTIN(0) Then yrmade=MAX(1968,year-yrs_owned);
end;

If model= . then do;
   yrmade=year-yrs_owned;
end;

*for cars purchased used adjust by average age of a used car
   at purchase from 1980--6.97 years;
if vehnewu=2 then yrmade=yrmade-4;

yrmade=round(yrmade,1);

* ABOUT 10% HAVE YRMADE LE 1959 UPTO THIS POINT;
IF yrmade NE . THEN yrmade=MAX(yrmade,1959);

* CALC VEHPURYR BASED OFF OF OWN_FOR FOR THE REMAINING MISSING VEHPURYR;
IF vehpuryr=. AND own_for NE . THEN vehpuryr=MAX(0,year-ROUND(own_for/12));

*Calculating Vehicle Age;
vehage=year-yrmade;
If vehage=-1 Then vehage=0;
If vehage GT 20 Then vehage=20;

RUN;


**************************************************************************
Section 3
**************************************************************************;

*Add demographics;
DATA demographics;
set carslib.demog7273;
newid=(newid-99000000)+1000000*year;
run;
proc sort data=demographics;
by newid;
run;
proc sort data=auto7273;
by newid;
run;

DATA auto7273_3;
merge auto7273(in=in1) demographics;
by newid;
if in1;
expend2=expend*expend;
age_ref2=age_ref*age_ref;


*Generate dummy variables for models.  MODEL_ADJ TAKES ON VALUES 0-22;
ARRAY model_dum(1:23);

DO t=1 TO 23;
   IF model_adj=(t-1) THEN model_dum(t)=1;
   ELSE model_dum(t)=0;
END;
DROP t;

*Generate dummy variables for vmake.  VMAKE_ADJ TAKES ON VALUES 1-29;
ARRAY vmake_dum(1:29);

DO t=1 TO 29;
   IF vmake_adj=t THEN vmake_dum(t)=1;
   ELSE vmake_dum(t)=0;
END;
DROP t;

*Create Dummy Variables for Time Trends;

If yrmade LE 1966 then yrmade_dum1966=1;
	else yrmade_dum1966=0;
Array yrdum(1:7);
Do t=1 to 7;
If yrmade=t+1966 then yrdum(t)=1;
	else yrdum(t)=0;
end;

*Create Dummy for New/Used Car
Vehnewu is variable for new or used car;

If vehnewu=1 then vehnewu_dum=1;
	else vehnewu_dum=0;

*Create Dummy for Automatic Transmission;
IF autotran NE . THEN nm_autotran=1;
	Else nm_autotran=0;
IF autotran=1 THEN at=1;
	ELSE IF autotran=2 THEN at=0;
	ELSE at=.;
IF nm_autotran=1 THEN atx=at;
	ELSE atx=0;
If at=. then atx=0;
*Create Dummy for Power Steering;
IF pwrsteer NE . THEN nm_pwrsteer=1;
	Else nm_pwrsteer=0;
IF pwrsteer=3 THEN ps=1;
	ELSE IF pwrsteer=4 THEN ps=0;
	ELSE ps=.;
IF nm_pwrsteer=1 THEN psx=ps;
	ELSE psx=0;
If ps=. then psx=0;
*Create Dummy for Power Brake;
IF pwrbrake NE . THEN nm_pwrbrake=1;
	Else nm_pwrbrake=0;
IF pwrbrake=5 THEN pb=1;
	ELSE IF pwrbrake=6 THEN pb=0;
	ELSE pb=.;
IF nm_pwrsteer=1 THEN pbx=pb;
	ELSE pbx=0;
If pb=. then pbx=0;
*Create Dummy for Air Conditioning;
IF aircar NE . THEN nm_aircar=1;
	Else nm_aircar=0;
IF aircar=1 THEN ac=1;
	ELSE IF aircar=2 THEN ac=0;
	ELSE ac=.;
IF nm_aircar=1 THEN acx=ac;
	ELSE acx=0;
If ac=. then acx=0;

vehage_2=vehage*vehage;
vehage_3=vehage*vehage*vehage;


RUN;

PROC MEANS;
RUN;

proc sort data=auto7273_3;
by vehpuryr;
RUN;
proc means data=auto7273_3;
by vehpuryr;
var own_for totpurx netpurx tradex;
RUN;


**************************************************************************
Section 7
**************************************************************************;
*Creating an estimation sample;
*sample consists of observations that have net purchase price and are bought for oneself;
*Estimation sample for vehicles purchased in last 18 months; 
Data estimation7273;
set auto7273_3;
IF totpurx GT 100 AND vehgftc NE 3 and 0<=own_for<=18 and vehpuryr GE 1972 ;
RUN;

PROC MEANS;
RUN;


proc sort data=estimation7273;
by vmake_adj model_adj yrmade;
RUN;

*Create a comprehensive list in estimation sample of all possible combinations of
vmake model and yrmade;
Data match1;
set estimation7273;
by vmake_adj model_adj yrmade;
If FIRST.vmake_adj or FIRST.model_adj or FIRST.yrmade;
mmy_match=1;
Keep vmake_adj model_adj yrmade mmy_match;
run;

*Create a comprehensive list in estimation sample of all possible combinations of 
vmake and yrmade;
proc sort Data=estimation7273;
by vmake_adj yrmade;
run;
Data match2;
set estimation7273;
by vmake_adj yrmade;
If FIRST.vmake_adj or FIRST.yrmade;
may_match=1;
Keep vmake_adj yrmade may_match;
run;
*Merge Match Variables with rest of Master Data Set;
*Merging estimation match 1 and the master by
vmake model yrmade;
proc sort Data=auto7273_3; 
by vmake_adj model_adj yrmade;
run;

Data auto7273_4;
merge auto7273_3(IN=in1) match1;
by vmake_adj model_adj yrmade;
IF in1;
Run;
*Merging estimation match 2 and the master by 
vmake and yrmade;
proc sort Data=auto7273_4;
by vmake_adj yrmade;
run;
Data auto7273_4;
merge auto7273_4(IN=in1) match2;
by vmake_adj yrmade;
IF in1;
Run;
*Group observations by match characteristics;
Data carsfinal;
set auto7273_4;
*Estimation samples: <=12 month group 1 over 12 months group 2; 
IF totpurx GT 100 AND vehgftc NE 3 and 0<=own_for<=18 and vehpuryr GE 1972 then group=1;
   ELSE IF 1969 LE vehpuryr LE 1971 AND totpurx GT 100 AND vehage NE . then group=2;
   * In Prediction sample with vehpuryr observed;
   ELSE IF totpurx LE 100 AND vehpuryr GE 1959 AND mmy_match=1 then group=3;
   ELSE IF totpurx LE 100 AND vehpuryr GE 1959 AND may_match=1 then group=4;
   Else  group=5;

ln_expend=log(expend);

*create unique make model year-made identifier (mmy_id2) and a make year-made identifier (makeyr_id);
mmy_id2=10000*make_model+yrmade;
makeyr_id=10000*vmake_adj+yrmade;


* PUT CAR VALUES AND EXPENDITURES IN 1973 DOLLARS FOR ESTIMATION SAMPLE REGRESSIONS;
* adjustment from CPI-U-RS;
IF year=1972 THEN r_expend=expend*1.065;
   ELSE r_expend=expend;
* adjustment from CARS CPI-U;
IF year=1972 THEN r_totpurx=totpurx*1.0458;
   ELSE r_totpurx=totpurx;

If group=1 then ln_rtotpurx=log(r_totpurx);
ln_r_expend=log(r_expend);

run;
proc freq data=carsfinal;
table year;
run;


**************************************************************************
Section 8
**************************************************************************;
proc sort data=carsfinal;
by mmy_id2;
run;
*Calculating the means for the estimation sample to computed predicted values;
Proc means noprint Data=carsfinal;
by mmy_id2;
where group IN(1);
  VAR ln_rtotpurx vehage vehage_2 vehage_3 nm_autotran nm_pwrbrake nm_aircar nm_pwrsteer 
		 atx psx pbx acx 
		ln_r_expend ed1 ed2 ed3 fam1 fam2 fam3 fam_size age_ref age_ref2 regn1-regn3;
   OUTPUT OUT=mean_dat MEAN=avg1-avg25 STD=mmy_std1 mmy_std2;
   run;
DATA carsfinal2;
   MERGE carsfinal(IN=in1) 
		 mean_dat(KEEP=mmy_id2 avg1-avg25 mmy_std1 mmy_std2 _FREQ_ RENAME=(_FREQ_=mmy_count));
   BY mmy_id2;
	if in1;
ARRAY regvar(1:25) ln_rtotpurx vehage vehage_2 vehage_3 nm_autotran 
						nm_pwrbrake nm_aircar nm_pwrsteer  atx psx pbx acx  
						ln_r_expend ed1 ed2 ed3 fam1 fam2 fam3 fam_size age_ref 
						age_ref2 regn1-regn3;
ARRAY varmean(1:25) avg1-avg25;
ARRAY d_mean(1:25)	d_ln_rtotpurx d_vehage d_vehage_2 d_vehage_3 d_nm_autotran 
						d_nm_pwrbrake d_nm_aircar d_nm_pwrsteer  
						 d_atx d_psx d_pbx d_acx 
						 d_ln_r_expend d_ed1 d_ed2 d_ed3 d_fam1 d_fam2 d_fam3 
						d_fam_size d_age_ref d_age_ref2 d_regn1-d_regn3;

DO t=1 TO 25;
   d_mean(t)=regvar(t)-varmean(t);
END;
DROP t;

run;
Data temp;
set carsfinal2;
If group IN (1,3) and avg1 NE .;
run;
*Regressions for make model yrmade match;
proc reg data=carsfinal2 OUTEST=est_1;
where group=1;
model d_ln_rtotpurx= d_vehage d_vehage_2 d_vehage_3 d_nm_autotran 
						 d_nm_aircar    
						 d_atx d_psx d_pbx d_acx 
						 d_ln_r_expend d_ed1 d_ed2 d_ed3 d_fam1 d_fam2 d_fam3 
						d_fam_size d_age_ref d_age_ref2 d_regn1-d_regn3/noint;
   OUTPUT OUT=reg_out1 P=yhat R=resid;
RUN;
* ADJUST FOR FACT THAT GENERATING PREDICTED VALUES FOR LOGS (SEE BABY WOOLDRIDGE P. 208);
DATA alpha_est;
SET reg_out1(KEEP=yhat ln_rtotpurx avg1);
  m_hat=EXP(yhat+avg1);
  y=EXP(ln_rtotpurx);
KEEP m_hat y;
RUN;
PROC REG DATA=alpha_est OUTEST=alpha1;
   MODEL y = m_hat /noint;
RUN;
QUIT;

Proc score data=temp Score=est_1 Out=pred_1 type=parms;
id id avg1;
var d_vehage d_vehage_2 d_vehage_3 d_nm_autotran 
						 d_nm_aircar    
						 d_atx d_psx d_pbx d_acx 
						 d_ln_r_expend d_ed1 d_ed2 d_ed3 d_fam1 d_fam2 d_fam3 
						d_fam_size d_age_ref d_age_ref2 d_regn1-d_regn3;
run;
*Predicted values (purchase prices) for make model year-made matches;
DATA pred_1;
IF _N_=1 THEN SET alpha1(KEEP=m_hat RENAME=(m_hat=alpha_adj1));
SET pred_1;
predval1=EXP(model1+avg1)*alpha_adj1;
RUN;
PROC MEANS;
RUN;
PROC SORT DATA=pred_1;
BY id;
RUN;



**************************************************************************
Section 9
**************************************************************************;
*Regressions for Make yrmade match;
Data temp_mk;
set carsfinal2;
If group IN(1 4);
Keep ln_rtotpurx vehage vehage_2 vehage_3 nm_autotran nm_pwrbrake nm_aircar nm_pwrsteer 
		 atx psx pbx acx 
		ln_r_expend ed1 ed2 ed3 fam1 fam2 fam3 fam_size age_ref age_ref2 regn1-regn3 makeyr_id group id;
run;

proc sort data=temp_mk;
by makeyr_id;
run;
*Calculate the means for estimation sample to compute predicted values;
*Calculating the means for the estimation sample to computed predicted values;
Proc means noprint Data=temp_mk;
by makeyr_id;
where group IN(1);
  VAR ln_rtotpurx vehage vehage_2 vehage_3 nm_autotran nm_pwrbrake nm_aircar nm_pwrsteer 
		 atx psx pbx acx 
		ln_r_expend ed1 ed2 ed3 fam1 fam2 fam3 fam_size age_ref age_ref2 regn1-regn3;
   OUTPUT OUT=mean_dat2 MEAN=mk_avg1-mk_avg25 STD=mk_mmy_std1 mk_mmy_std2;
   run;
DATA temp1 temp2;
   MERGE temp_mk(IN=in1) 
		 mean_dat2(KEEP=makeyr_id mk_avg1-mk_avg25 mk_mmy_std1 mk_mmy_std2 _FREQ_ RENAME=(_FREQ_=mk_count));
   BY makeyr_id;
	if in1;
	if mk_avg1 NE .;
ARRAY regvar(1:25) ln_rtotpurx vehage vehage_2 vehage_3 nm_autotran 
						nm_pwrbrake nm_aircar nm_pwrsteer  atx psx pbx acx  
						ln_r_expend ed1 ed2 ed3 fam1 fam2 fam3 fam_size age_ref 
						age_ref2 regn1-regn3;
ARRAY varmean(1:25) mk_avg1-mk_avg25;
ARRAY d_mean(1:25)	d2_ln_rtotpurx d2_vehage d2_vehage_2 d2_vehage_3 d2_nm_autotran 
						d2_nm_pwrbrake d2_nm_aircar d2_nm_pwrsteer  
						 d2_atx d2_psx d2_pbx d2_acx 
						 d2_ln_r_expend d2_ed1 d2_ed2 d2_ed3 d2_fam1 d2_fam2 d2_fam3 
						d2_fam_size d2_age_ref d2_age_ref2 d2_regn1-d2_regn3;

DO t=1 TO 25;
   d_mean(t)=regvar(t)-varmean(t);
END;
DROP t;


If group=1 then output temp1;
If group=4 then output temp2;
run;
proc reg data=temp1 OUTEST=est_2;
model d2_ln_rtotpurx= d2_vehage d2_vehage_2 d2_vehage_3 d2_nm_autotran 
						d2_nm_pwrbrake d2_nm_aircar   
						 d2_atx d2_psx d2_pbx d2_acx 
						 d2_ln_r_expend d2_ed1 d2_ed2 d2_ed3 d2_fam1 d2_fam2 d2_fam3 
						d2_fam_size d2_age_ref d2_age_ref2 d2_regn1-d2_regn3/noint;
   OUTPUT OUT=reg_out2 P=yhat R=resid;
RUN;
* ADJUST FOR FACT THAT GENERATING PREDICTED VALUES FOR LOGS (SEE BABY WOOLDRIDGE P. 208);
DATA alpha_est;
SET reg_out2(KEEP=yhat ln_rtotpurx mk_avg1);
  m_hat=EXP(yhat+mk_avg1);
  y=EXP(ln_rtotpurx);
KEEP m_hat y;
RUN;
PROC REG DATA=alpha_est OUTEST=alpha2;
   MODEL y = m_hat /noint;
RUN;
QUIT;

Proc score data=temp2 Score=est_2 Out=pred_2 type=PARMS;
id id mk_avg1;
var d2_vehage d2_vehage_2 d2_vehage_3 d2_nm_autotran 
						d2_nm_pwrbrake d2_nm_aircar   
						 d2_atx d2_psx d2_pbx d2_acx 
						 d2_ln_r_expend d2_ed1 d2_ed2 d2_ed3 d2_fam1 d2_fam2 d2_fam3 
						d2_fam_size d2_age_ref d2_age_ref2 d2_regn1-d2_regn3;
run;
*Predicted values (purchase prices) for make year-made matches;
DATA pred_2;
IF _N_=1 THEN SET alpha2(KEEP=m_hat RENAME=(m_hat=alpha_adj2));
SET pred_2(RENAME=(model1=model2));
predval2=EXP(model2+mk_avg1)*alpha_adj2;
KEEP id predval2 model2;
RUN;
PROC MEANS;
RUN;

PROC SORT DATA=pred_2;
   BY id;
RUN;



**************************************************************************
Section 10
**************************************************************************;
*Predict Values for those without any make or yearmade match;
*Use Vehicyb information;
Data temp3 temp35;
set carsfinal2;
*Include Group=9s because some dont have missing predval1;
If group IN(1 5);
If vehicyb=01 then vtype1=1;*automobile;
	Else vtype1=0;
If vehicyb=02 then vtype2=1;*truck;
	Else vtype2=0;
If vehicyb=03 then vtype3=1;*self-propelled camper;
	Else vtype3=0; 
If vehicyb=04 then vtype4=1;*Trailer type camper;
	Else vtype4=0;
If vehicyb=05 then vtype5=1;*other attachable camper;
	Else vtype5=0;
If vehicyb=06 then vtype6=1;*motorcycle or motorscooter;
	Else vtype6=0;
If vehicyb=07 then vtype7=1;*boat;
	Else vtype7=0;
If vehicyb=08 then vtype8=1;*Trailer other than camper type;
	Else vtype8=0;
If vehicyb=09 then vtype9=1;*Private Planes, Gliders;
	Else vtype9=0;
If vehicyb=10 then vtype10=1;*Any other vehicle;
	Else vtype10=0;

Keep vtype1-vtype10 id group ln_rtotpurx nm_autotran nm_pwrbrake nm_aircar atx psx pbx acx  
						ln_r_expend ed1 ed2 ed3 fam1 fam2 fam3 fam_size age_ref 
						age_ref2 regn1-regn3 yeardummy;

If group=5 then output temp3;
If group=1 then output temp35;
run;
Proc reg Data=temp35 Outest=est_3;
Model ln_rtotpurx=  nm_autotran nm_aircar   atx psx pbx acx  
						ln_r_expend ed1 ed2 ed3 fam1 fam2 fam3 fam_size age_ref 
						age_ref2 regn1-regn3 vtype1 vtype2 vtype3 vtype6 vtype10 yeardummy;
   OUTPUT OUT=reg_out3 P=yhat R=resid;
RUN;
* ADJUST FOR FACT THAT GENERATING PREDICTED VALUES FOR LOGS (SEE BABY WOOLDRIDGE P. 208);
DATA alpha_est;
SET reg_out3(KEEP=yhat ln_rtotpurx);
  m_hat=EXP(yhat);
  y=EXP(ln_rtotpurx);
KEEP m_hat y;
RUN;
PROC REG DATA=alpha_est OUTEST=alpha3;
   MODEL y = m_hat /noint;
RUN;
QUIT;

*Predicted values (purchase prices) for those observations that we can not match
on make model year-made matches or make year-made matches;
Proc Score Data=temp3 SCORE=est_3 OUT=pred_3 TYPE=PARMS;
	ID id;
	Var  nm_autotran nm_aircar   atx psx pbx acx  
						ln_r_expend ed1 ed2 ed3 fam1 fam2 fam3 fam_size age_ref 
						age_ref2 regn1-regn3 vtype1 vtype2 vtype3 vtype6 vtype10 yeardummy;
run;

DATA pred_3;
IF _N_=1 THEN SET alpha3(KEEP=m_hat RENAME=(m_hat=alpha_adj3));
SET pred_3(RENAME=(model1=model3));
*PUT CEILING ON PRED_VAL, BECAUSE ABOUT 2% HAVE LARGE PREDICTED VALUE FOR MODEL3;
IF model3 NE . THEN predval3=EXP(MIN(10,model3))*alpha_adj3;
KEEP id predval3 model3;
RUN;
PROC MEANS;
RUN;

PROC SORT DATA=pred_3;
BY id;
RUN;


**************************************************************************
Section 11
**************************************************************************;
*Estimate Depreciation Rates;
Data temp4;
set carsfinal2;
*restrict to mmy groups that have at least 2 vehicles that are not the same price;
if group=1 and mmy_count GE 2 and mmy_std2 GT 0;

IF vehage IN (0,1) and vehnewu=1 THEN new1=1;
	ELSE new1=0;
IF vehage IN (0,1) and vehnewu=2 THEN new2=1;
	ELSE new2=0;

new3=1-new1-new2;
*No spline or decade variables;

ageXnew3=vehage*new3;

*There are 17 different makes in TEMP4;
ARRAY ageXvmake(1:29);
ARRAY vmake_tmp(1:29) vmake_dum1-vmake_dum29;

DO t=1 to 29;
ageXvmake(t)=vehage*vmake_tmp(t);
End;
Drop t;

Keep ln_rtotpurx vmake_adj id mmy_id2 vehage vehage_2 vehage_3 new1-new3 ageXnew3
     vmake_dum1-vmake_dum29 ageXvmake1-ageXvmake29;

run;
proc means data=temp4;
run;


proc sort data=temp4;
by mmy_id2;
run;
* WE DON'T USE THE FULL SET OF VMAKE DUMMIES, B/C FOR THIS SUBSAMPLE NOT ALL VMAKES ARE PRESENT;
PROC STANDARD Data=temp4  OUT=std_depreciation MEAN=0;
by mmy_id2;
Var ln_rtotpurx vehage vehage_2 vehage_3 vmake_dum1-vmake_dum14 vmake_dum18 vmake_dum29 
	new1 ageXnew3 ageXvmake1-ageXvmake14 ageXvmake18 ageXvmake29;

run;
*In estimation of vflow's we only use the depreciation rate we estimate from this equation;
Proc Reg Data=std_depreciation OUTEST=est_4;
MODEL ln_rtotpurx=vehage/noint;
run;

Proc Reg Data=std_depreciation OUTEST=est_5;
MODEL ln_rtotpurx=vehage vehage_2 vehage_3 /noint;
run;

*Proc Reg Data=std_depreciation OUTEST=est_7;
*MODEL ln_rtotpurx=vehage ageXvmake1-ageXvmake14 ageXvmake18 ageXvmake29/noint;
*run;
*Merge All Info Back With Master Data;
Proc sort Data=carsfinal2;
by id;
run;
Data carslib.carsfinal3;
Merge carsfinal2(In=in1)
pred_1(Keep=id model1 predval1)
pred_2(Keep=id model2 predval2)
pred_3(Keep=id model3 predval3);

by id;


Keep id newid model1 predval1 predval2 model2 predval3  ln_rtotpurx totpurx mmy_id2 year  
     group vehicyb vehage vehage_2 vehage_3 vmake_adj yrmade own_for vehpuryr vehnewu 
	 r_totpurx atx psx pbx acx vehgftc model ;
run;

proc means;
run;

**************************************************************************
Section 12
**************************************************************************;
Data carslib.carsfinal3;
If _N_=1 then set est_4(Keep=vehage Rename=(vehage=beta1));
If _N_=1 then set est_5(Keep=vehage vehage_2 vehage_3 
					    Rename=(vehage=beta2 vehage_2=beta3 vehage_3=beta4));
*No spline decade or ageXvmake regressions - lack of data;

set carslib.carsfinal3;

If vehage IN(0,1) and vehnewu=1 then new1=1;
else new1=0;
If vehage IN(0,1) and vehnewu=2 then new2=1;
else new2=0;
new3=new1-new2;

*No spline or spline interaction terms or decade variables;

If predval1 NE . then predval=predval1;
	Else if predval2 NE . then predval=predval2;
	Else if predval3 NE . then predval=predval3;


**************************************************************************
CALCULATE CPI-U-RS FOR CARS, COMES FROM PRICES_CARS.XLS
ONLY GO BACK TO 1969 B/C THIS IS FOR GROUP=2, WHICH IS RESTRICTED TO 1969-1971 
**************************************************************************;
* the oldest purchase year for group 2 is 1962;

cpiu_rs_cars1=26.448;  *1969;
cpiu_rs_cars2=26.882;
cpiu_rs_cars3=28.289;
cpiu_rs_cars4=28.257;
cpiu_rs_cars5=29.551;  *1973;


* Need to discount reported purchase price by the number of years since purchase
  using the CPI-U_RS;
ARRAY p(1:5) cpiu_rs_cars1-cpiu_rs_cars5;

DO t=1 TO 5;   
   IF year-1968=t THEN surv_cpi=p(t);
   IF vehpuryr-1968=t THEN purch_cpi=p(t);
END; 
 
DROP t cpiu_rs_cars1-cpiu_rs_cars5;

delta2=1-EXP(beta1);
*THIS DEPRECIATION RATE TAKEN FROM 1980-2005 ESTIMATES B/C INSUFFICIENT
 DATA ON CARS WITH DIFFERENT AGES IN THE 72-73 DATA;
delta=0.146;


**************************************************************************
CALCULATE SERVICE FLOW IN 2005 DOLLARS
**************************************************************************;
IF group=1 THEN DO;
	vflow1=r_totpurx*delta;
	vflow1b=r_totpurx*delta2;
	*vflow2=r_totpurx*((-1*beta2)+2*(-1*beta3)*vehage+3*(-1*beta4)*vehage**2);

END;
IF group IN(2) THEN DO;
* NOTE r_totpurx=totpurx*1973_cpi/surv_cpi, SO r_totpurx*surv_cpi/purch_cpi PUTS VEHICLE
  VALUE IN REAL 1973 DOLLARS FOR CARS PURCHASED MORE THAN 12 MONTHS BEFORE INTERVIEW;
	vflow1=r_totpurx*surv_cpi/purch_cpi*delta*(1-delta)**ROUND(own_for/12);
	vflow1b=r_totpurx*surv_cpi/purch_cpi*delta2*(1-delta2)**ROUND(own_for/12);
*	vflow2=totpurx*(beta2+2*beta3*vehage+3*beta4*vehage**2)
				  *(1-(beta2+2*beta3*vehage+3*beta4*vehage**2))**(own_for/12);
*	vflow3=totpurx*(beta5+beta6*ageXdecade2+beta7*ageXdecade3
						+beta8*ageXdecade4+beta9*ageXdecade5)
				  *(1-(beta5+beta6*ageXdecade2+beta7*ageXdecade3
						+beta8*ageXdecade4+beta9*ageXdecade5))**(own_for/12);
END;
IF group IN (3 4 5) THEN DO;
	vflow1=predval*delta;
	vflow1b=predval*delta2;
	*vflow2=predval*((-1*beta2)+2*(-1*beta3)*vehage+3*(-1*beta4)*vehage**2);
	*vflow3=predval*(beta5+beta7*spline1+beta9*spline2+beta11*spline3)*(-1);
	*vflow4=predval*(beta12+beta13*decade_2+beta14*decade_3)*(-1);
END;

*** GENERATE SURVEY YEAR THAT MATCHES WITH FAMILY FILE;
*srv_year=floor(qyear/10)-1900;

RUN;
PROC MEANS DATA=carslib.carsfinal3;
RUN;

PROC SORT DATA=carslib.carsfinal3;
   BY newid ;
RUN;
* NOTE, WE USE THE ESTIMATED DELTA, NOT THE ONE FROM 1980;
PROC MEANS SUM NOPRINT DATA=carslib.carsfinal3;
   BY newid ;
   VAR vflow1b;
   OUTPUT OUT=tot_vdat SUM=tot_vflow1;
RUN;


**************************************************************************
 Section 13 - MAKE SAS DATASET TO MERGE WITH FAMILY FILE
**************************************************************************;
DATA carslib.car_flows7273;
   MERGE carslib.carsfinal3(KEEP=newid year) 
		 tot_vdat(KEEP=newid tot_vflow1 _FREQ_ RENAME=(_FREQ_=num_cars));
   BY newid;

   If First.newid;

**************************************************************************
PUT VALUES BACK IN NOMINAL DOLLARS--EITHER 1972 OR 1973
USE CPI-U-RS FOR CARS
COMES FROM PRICES_CARS.XLS, USES WGT'D AVG OF NEW AND USED CAR SERIES GIVEN 
THAT ABOUT 60% OF ALL CARS ARE PURCHASED USED;
**************************************************************************;
IF year=1972 THEN n_tot_vflow1=tot_vflow1/1.0458;
   ELSE IF year=1973 THEN n_tot_vflow1=tot_vflow1;

RUN;
PROC MEANS DATA=carslib.car_flows7273;
RUN;



PROC MEANS DATA=carslib.car_flows7273_bak;
RUN;

proc compare base=carslib.car_flows7273
             compare=carslib.car_flows7273_bak
			 novalues
;
run;



